On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis thaliana. Le CO2, au cours des études préliminaires, s’est montré peu influent en conditions contrôles de fer et de nitrates, et accentué en cas de stress nutritionnel. Nous reprennons ces résultats avec des fonctions génériques et propres pour en faire le résumé et de jolis graphes.
On a, pour chaque gène et chaque condition, son niveau d’expression en sortie de quantification. On labelle les conditions avec le code suivant : lettre majuscule pour le niveau fort, minuscule pour le niveau faible. Le réplicat est donné après l’underscore.
# quantification file
data <- read.csv("quantifFiles/QuantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
[1] "At_AmbientCO2_LowNitrate_Fe"
keep <- rowSums(data) >= 10
data <- data[keep, ]
group <- sapply(colnames(data), getLabel, with.rep = F)
colnames(data) <- sapply(colnames(data), getLabel)
head(data) cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2 CnF_1 CnF_3 cNf_1
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325
AT1G01020 416 285 289 349 364 370 226 513 502 561 461
AT1G01030 31 15 19 29 36 28 12 47 34 47 18
cnf_2 cnf_1 cNf_2 cNf_3 cnf_3 Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2
AT1G01010 2113 2193 2564 2364 2074 1987 2027 1754 1697 1537 1898
AT1G01020 407 432 614 380 502 484 467 426 415 413 462
AT1G01030 40 32 44 37 27 42 39 36 40 37 37
CNF_3 CNF_2
AT1G01010 816 912
AT1G01020 223 312
AT1G01030 15 19
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 23342 24
On définit les conditions contrôle comme suit : fort nitrate et fort fer.
method = "edger"
g = list()
# reference condition as first element of labels
labels <- c("cNF", "CNF")
genes1 <- dualDE(data, labels, pval = 0.01, method = method) (Intercept) groupCNF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "CNF_1" "CNF_3" "CNF_2"
cNF_3 cNF_2 cNF_1 CNF_1 CNF_3 CNF_2
1.0090325 1.0047763 0.9805842 1.0116521 0.9699686 1.0239863
[1] "68 genes DE"
[1] 17
a$flc <- genes1[match(a$ensembl_gene_id, genes1$gene_id), ]$m.value
a$p.value <- genes1[match(a$ensembl_gene_id, genes1$gene_id), ]$p.value
a <- a[order(a$p.value), ]
kable(a)| ensembl_gene_id | description | external_gene_name | entrezgene_id | flc | p.value | |
|---|---|---|---|---|---|---|
| 68 | AT5G64120 | Peroxidase [Source:UniProtKB/TrEMBL;Acc:A0A178UAR8] | PER71 | 836533 | -1.1121447 | 0.00e+00 |
| 8 | AT1G19250 | Probable flavin-containing monooxygenase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMA1] | FMO1 | 838508 | -0.8706103 | 0.00e+00 |
| 48 | AT4G25100 | Superoxide dismutase [Fe] 1, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:P21276] | FSD1 | 828613 | -0.9038503 | 0.00e+00 |
| 66 | AT5G59520 | Zinc transporter 2 [Source:UniProtKB/Swiss-Prot;Acc:Q9LTH9] | ZIP2 | 836071 | -1.2066060 | 0.00e+00 |
| 21 | AT2G01520 | MLP-like protein 328 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZVF3] | MLP328 | 814681 | -0.9716064 | 0.00e+00 |
| 18 | AT1G67270 | Zinc-finger domain of monoamine-oxidase A repressor R1 protein [Source:TAIR;Acc:AT1G67270] | 843047 | -2.2140173 | 0.00e+00 | |
| 58 | AT5G23980 | Ferric reduction oxidase 4 [Source:UniProtKB/Swiss-Prot;Acc:Q8W110] | FRO4 | 832463 | -1.1156310 | 0.00e+00 |
| 54 | AT4G37060 | PATATIN-like protein 5 [Source:TAIR;Acc:AT4G37060] | PLP5 | 829860 | -0.8191550 | 0.00e+00 |
| 59 | AT5G23990 | ferric reduction oxidase 5 [Source:TAIR;Acc:AT5G23990] | ATFRO5 | 832464 | -2.0335514 | 0.00e+00 |
| 19 | AT1G73120 | At1g73120 [Source:UniProtKB/TrEMBL;Acc:Q9CAS9] | 843643 | -1.4553340 | 0.00e+00 | |
| 32 | AT3G12900 | 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily protein [Source:UniProtKB/TrEMBL;Acc:Q9LE86] | 820473 | -1.5018319 | 0.00e+00 | |
| 51 | AT4G33560 | At4g33560 [Source:UniProtKB/TrEMBL;Acc:O81873] | 829495 | 1.5512091 | 0.00e+00 | |
| 25 | AT2G35980 | NDR1/HIN1-like protein 10 [Source:UniProtKB/Swiss-Prot;Acc:Q9SJ52] | NHL10 | 818171 | -0.9522009 | 0.00e+00 |
| 52 | AT4G33720 | AT4g33720/T16L1_210 [Source:UniProtKB/TrEMBL;Acc:O81888] | 829514 | 0.6986856 | 0.00e+00 | |
| 50 | AT4G31940 | Cytochrome P450 82C4 [Source:UniProtKB/Swiss-Prot;Acc:Q9SZ46] | CYP82C4 | 829324 | -1.7819345 | 0.00e+00 |
| 12 | AT1G51900 | Regulator of Vps4 activity in the MVB pathway protein [Source:UniProtKB/TrEMBL;Acc:F4IB75] | 841617 | -1.5774468 | 0.00e+00 | |
| 36 | AT3G56290 | Potassium transporter [Source:UniProtKB/TrEMBL;Acc:Q9LYL4] | 824796 | 1.3120058 | 0.00e+00 | |
| 63 | AT5G44110 | ABC transporter I family member 21 [Source:UniProtKB/Swiss-Prot;Acc:Q9XF19] | ABCI21 | 834434 | 0.6189247 | 1.00e-07 |
| 38 | AT4G02330 | Probable pectinesterase/pectinesterase inhibitor 41 [Source:UniProtKB/Swiss-Prot;Acc:Q8RXK7] | PME41 | 828064 | -0.7802979 | 1.00e-07 |
| 15 | AT1G62510 | Bifunctional inhibitor/lipid-transfer protein/seed storage 2S albumin superfamily protein [Source:UniProtKB/TrEMBL;Acc:Q9SXE6] | 842548 | 0.8382989 | 1.00e-07 | |
| 4 | AT1G05880 | Probable E3 ubiquitin-protein ligase ARI12 [Source:UniProtKB/Swiss-Prot;Acc:Q84RQ9] | ARI12 | 837098 | -0.6493739 | 1.00e-07 |
| 26 | AT2G39040 | Peroxidase 24 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZV04] | PER24 | 818490 | 0.6387685 | 2.00e-07 |
| 10 | AT1G43800 | Stearoyl-[acyl-carrier-protein] 9-desaturase 6, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:Q84VY3] | S-ACP-DES6 | 840977 | 1.0557551 | 3.00e-07 |
| 53 | AT4G33980 | BEST Arabidopsis thaliana protein match is: cold regulated gene 27 (TAIR:AT5G42900.2); Ha. [Source:TAIR;Acc:AT4G33980] | 829544 | -0.6979390 | 3.00e-07 | |
| 41 | AT4G11170 | Putative disease resistance protein At4g11170 [Source:UniProtKB/Swiss-Prot;Acc:O82500] | 826718 | -0.9537231 | 4.00e-07 | |
| 44 | AT4G14368 | Regulator of chromosome condensation (RCC1) family protein [Source:UniProtKB/TrEMBL;Acc:F4JVE8] | 7922506 | -1.4474268 | 4.00e-07 | |
| 62 | AT5G39890 | Plant cysteine oxidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q8LGJ5] | PCO2 | 833986 | 0.7311023 | 6.00e-07 |
| 11 | AT1G44030 | Cysteine/Histidine-rich C1 domain family protein [Source:TAIR;Acc:AT1G44030] | 841003 | -0.9133634 | 6.00e-07 | |
| 61 | AT5G39580 | Peroxidase 62 [Source:UniProtKB/Swiss-Prot;Acc:Q9FKA4] | PER62 | 833954 | -0.5805185 | 6.00e-07 |
| 49 | AT4G25480 | Dehydration-responsive element-binding protein 1A [Source:UniProtKB/Swiss-Prot;Acc:Q9M0L0] | DREB1A | 828652 | 1.0852608 | 7.00e-07 |
| 28 | AT3G03270 | AT3G03270 protein [Source:UniProtKB/TrEMBL;Acc:Q8LFK2] | 821303 | 0.7571787 | 8.00e-07 | |
| 13 | AT1G55790 | Domain of unknown function (DUF2431) [Source:TAIR;Acc:AT1G55790] | NA | -1.1062225 | 9.00e-07 | |
| 55 | AT4G38420 | Putative pectinesterase [Source:UniProtKB/TrEMBL;Acc:Q8VYB3] | sks9 | 829999 | -1.3042191 | 1.10e-06 |
| 65 | AT5G56080 | NAS2 [Source:UniProtKB/TrEMBL;Acc:A0A178ULG9] | NAS2 | 835707 | -1.2850144 | 1.10e-06 |
| 47 | AT4G21380 | Receptor-like serine/threonine-protein kinase SD1-8 [Source:UniProtKB/Swiss-Prot;Acc:O81905] | SD18 | 827890 | -1.4446904 | 1.20e-06 |
| 7 | AT1G14550 | Peroxidase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M9Q9] | PER5 | 838017 | -1.1463307 | 1.70e-06 |
| 64 | AT5G44190 | Transcription activator GLK2 [Source:UniProtKB/Swiss-Prot;Acc:Q9FFH0] | GLK2 | 834442 | 0.6770205 | 1.90e-06 |
| 3 | AT1G01680 | U-box domain-containing protein 54 [Source:UniProtKB/Swiss-Prot;Acc:Q9LQ92] | PUB54 | 839251 | -0.8500759 | 2.00e-06 |
| 9 | AT1G21240 | Wall-associated receptor kinase 3 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMN8] | WAK3 | 838719 | -2.0677901 | 2.00e-06 |
| 23 | AT2G02950 | Protein PHYTOCHROME KINASE SUBSTRATE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9SWI1] | PKS1 | 814823 | 0.8424637 | 2.20e-06 |
| 56 | AT5G02780 | Glutathione S-transferase L1 [Source:UniProtKB/Swiss-Prot;Acc:Q6NLB0] | GSTL1 | 831800 | -0.8407823 | 2.50e-06 |
| 22 | AT2G02010 | glutamate decarboxylase 4 [Source:TAIR;Acc:AT2G02010] | GAD4 | 814732 | -1.5060009 | 2.90e-06 |
| 27 | AT2G45760 | BON1-associated protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q58FX0] | BAP2 | 819184 | -1.1701408 | 3.30e-06 |
| 20 | AT1G76930 | extensin 4 [Source:TAIR;Acc:AT1G76930] | ATEXT4 | NA | -0.6165278 | 3.60e-06 |
| 33 | AT3G27650 | LOB domain-containing protein 25 [Source:UniProtKB/Swiss-Prot;Acc:Q8L8Q3] | LBD25 | 822387 | 0.7542595 | 4.00e-06 |
| 6 | AT1G12805 | Nucleotide binding protein [Source:UniProtKB/TrEMBL;Acc:Q3EDD5] | 2745749 | 1.3551609 | 4.50e-06 | |
| 16 | AT1G64170 | Cation/H(+) antiporter 16 [Source:UniProtKB/Swiss-Prot;Acc:Q1HDT3] | CHX16 | 842721 | -0.5285999 | 6.50e-06 |
| 2 | AT1G01560 | Mitogen-activated protein kinase 11 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMM5] | MPK11 | 839523 | -0.8915834 | 6.90e-06 |
| 39 | AT4G10500 | Protein DMR6-LIKE OXYGENASE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZSA8] | DLO1 | 826642 | -0.6512606 | 7.10e-06 |
| 37 | AT3G63380 | Calcium-transporting ATPase [Source:UniProtKB/TrEMBL;Acc:A0A178VEV7] | ACA12 | 825513 | -0.6351966 | 7.10e-06 |
| 34 | AT3G53150 | UDP-glucosyl transferase 73D1 [Source:TAIR;Acc:AT3G53150] | UGT73D1 | NA | -0.5938919 | 7.20e-06 |
| 24 | AT2G26040 | Abscisic acid receptor PYL2 [Source:UniProtKB/Swiss-Prot;Acc:O80992] | PYL2 | 817145 | 1.0224290 | 8.80e-06 |
| 14 | AT1G61480 | G-type lectin S-receptor-like serine/threonine-protein kinase At1g61480 [Source:UniProtKB/Swiss-Prot;Acc:O64771] | 842442 | -0.6212826 | 9.30e-06 | |
| 1 | AT1G01060 | LHY1 [Source:UniProtKB/TrEMBL;Acc:A0A178W761] | LHY | 839341 | 0.8888039 | 9.90e-06 |
| 17 | AT1G64770 | NDH-dependent cyclic electron flow 1 [Source:UniProtKB/TrEMBL;Acc:F4I890] | NDF2 | 842785 | -1.9135721 | 1.02e-05 |
| 5 | AT1G08090 | High-affinity nitrate transporter 2.1 [Source:UniProtKB/Swiss-Prot;Acc:O82811] | NRT2.1 | 837327 | -0.6404532 | 1.11e-05 |
| 40 | AT4G10510 | Subtilisin-like protease SBT3.7 [Source:UniProtKB/Swiss-Prot;Acc:Q9SZY2] | SBT3.7 | 826643 | -0.6200603 | 1.23e-05 |
| 43 | AT4G12550 | Putative lipid-binding protein AIR1 [Source:UniProtKB/Swiss-Prot;Acc:Q9S7I2] | AIR1 | 826868 | -0.6493248 | 1.26e-05 |
| 42 | AT4G11340 | Disease resistance protein (TIR-NBS-LRR class) family [Source:UniProtKB/TrEMBL;Acc:Q9SUS7] | 826736 | -1.3943382 | 1.32e-05 | |
| 29 | AT3G07650 | COL9 [Source:UniProtKB/TrEMBL;Acc:A0A384KVH3] | COL9 | 819956 | -0.6444135 | 1.33e-05 |
| 57 | AT5G22910 | Cation/H(+) antiporter 9 [Source:UniProtKB/Swiss-Prot;Acc:Q9FFB7] | CHX9 | 832355 | -1.2526964 | 1.38e-05 |
| 67 | AT5G60100 | Pseudo-response regulator 3 [Source:UniProtKB/TrEMBL;Acc:F4JXG7] | PRR3 | 836132 | -0.5868534 | 1.39e-05 |
| 30 | AT3G08040 | Protein DETOXIFICATION 43 [Source:UniProtKB/Swiss-Prot;Acc:Q9SFB0] | DTX43 | 819995 | -1.0088611 | 1.41e-05 |
| 60 | AT5G26920 | Calmodulin-binding protein 60 G [Source:UniProtKB/Swiss-Prot;Acc:F4K2R6] | CBP60G | 832750 | -0.5202703 | 1.48e-05 |
| 31 | AT3G08970 | TMS1 [Source:UniProtKB/TrEMBL;Acc:A0A178VJB6] | ERDJ3A | 820049 | -0.7287766 | 1.70e-05 |
| 35 | AT3G53280 | Cytochrome P450 71B5 [Source:UniProtKB/Swiss-Prot;Acc:O65784] | CYP71B5 | 824495 | -1.4499214 | 2.09e-05 |
| 46 | AT4G15248 | B-box domain protein 30 [Source:UniProtKB/Swiss-Prot;Acc:Q1G3I2] | MIP1A | 3769879 | 1.7519383 | 2.62e-05 |
| 45 | AT4G14370 | Disease resistance protein (TIR-NBS-LRR class) family [Source:TAIR;Acc:AT4G14370] | 827081 | -0.6972969 | 2.83e-05 |
(Intercept) groupCnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cnF_2" "cnF_1" "cnF_3" "CnF_2" "CnF_1" "CnF_3"
cnF_2 cnF_1 cnF_3 CnF_2 CnF_1 CnF_3
0.9862253 0.9873030 0.9924054 1.0167225 1.0047221 1.0126218
[1] "1312 genes DE"
(Intercept) groupCNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNf_1" "cNf_2" "cNf_3" "CNf_1" "CNf_3" "CNf_2"
cNf_1 cNf_2 cNf_3 CNf_1 CNf_3 CNf_2
0.9799002 1.0044883 0.9723876 0.9981139 1.0241404 1.0209697
[1] "2219 genes DE"
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cnf_2" "cnf_1" "cnf_3" "Cnf_3" "Cnf_1" "Cnf_2"
cnf_2 cnf_1 cnf_3 Cnf_3 Cnf_1 Cnf_2
0.9975633 1.0046643 1.0015093 0.9727479 1.0064673 1.0170479
[1] "1656 genes DE"
On visualise les gènes différentiellement exprimés en commun entre les différents niveaux des autres facteurs.
library(ggVennDiagram)
library(VennDiagram)
gene_list <- list()
for (comp in names(g)) {
print(g[[comp]])
gene_list[[comp]] <- g[[comp]]$gene_id
} gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT5G64120 8.797218 -1.1121447 1.895668e-15 4.424869e-11 1 1
2 AT1G19250 9.948535 -0.8706103 1.978657e-14 1.946764e-10 2 1
3 AT4G25100 9.324940 -0.9038503 2.502053e-14 1.946764e-10 3 1
4 AT5G59520 8.819711 -1.2066060 1.693363e-12 9.041343e-09 4 1
5 AT2G01520 14.038784 -0.9716064 1.936711e-12 9.041343e-09 5 1
6 AT1G67270 5.104540 -2.2140173 3.327779e-12 1.294617e-08 6 1
7 AT5G23980 8.719121 -1.1156310 2.014881e-10 6.718766e-07 7 1
8 AT4G37060 10.229806 -0.8191550 6.994221e-10 2.040739e-06 8 1
9 AT5G23990 4.823373 -2.0335514 9.556966e-10 2.478652e-06 9 1
upreg
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
[ reached 'max' / getOption("max.print") -- omitted 59 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT5G61600 10.978022 1.715345 5.768501e-56 1.346484e-51 1 1
2 AT4G27450 10.987889 1.361106 1.241560e-51 1.449024e-47 2 1
3 AT1G27730 9.421760 2.187544 2.462590e-51 1.916059e-47 3 1
4 AT5G51190 8.417337 2.782680 4.320997e-49 2.521518e-45 4 1
5 AT2G27830 9.879502 1.316552 1.397834e-39 6.525650e-36 5 1
6 AT1G19530 12.542251 1.538979 1.842265e-38 7.167027e-35 6 1
7 AT4G27280 10.488397 1.691216 4.608884e-38 1.536865e-34 7 1
8 AT5G13930 10.407278 -1.747350 6.719526e-38 1.960590e-34 8 1
9 AT3G44260 8.737846 2.483672 1.820216e-36 4.720832e-33 9 1
upreg
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 0
9 1
[ reached 'max' / getOption("max.print") -- omitted 1303 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT1G78340 10.002535 -3.261719 5.376231e-136 1.254920e-131 1 1
2 AT4G13180 10.301728 -2.811270 2.994626e-121 3.495029e-117 2 1
3 AT4G15550 8.217350 -3.781600 5.565167e-117 4.330071e-113 3 1
4 AT4G34131 8.946763 -2.699004 1.599866e-95 9.336015e-92 4 1
5 AT1G76680 9.778808 -3.051667 2.783462e-90 1.299431e-86 5 1
6 AT3G28345 9.654586 -2.538451 3.176643e-89 1.235820e-85 6 1
7 AT3G45060 9.161753 2.510301 5.456194e-82 1.819407e-78 7 1
8 AT1G76690 9.762616 -2.235867 5.470386e-76 1.596122e-72 8 1
9 AT3G28740 4.011621 -7.093650 9.091593e-76 2.357955e-72 9 1
upreg
1 0
2 0
3 0
4 0
5 0
6 0
7 1
8 0
9 0
[ reached 'max' / getOption("max.print") -- omitted 2210 rows ]
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT3G49620 8.971142 3.325629 4.576332e-77 1.068207e-72 1 1
2 AT2G26400 12.032573 -2.107176 1.946526e-64 2.271791e-60 2 1
3 AT1G21100 9.986600 1.996729 3.562033e-43 2.771499e-39 3 1
4 AT1G69530 9.527594 -1.507353 2.066807e-40 1.206085e-36 4 1
5 AT5G51860 6.042441 3.573607 4.906245e-40 2.290431e-36 5 1
6 AT5G51870 8.032896 1.826646 6.948595e-39 2.703235e-35 6 1
7 AT1G08630 8.692334 2.049414 1.918515e-37 6.397426e-34 7 1
8 AT1G56600 8.952644 -1.850385 1.108793e-36 3.235181e-33 8 1
9 AT1G43800 11.887403 -2.934754 2.086789e-36 5.412203e-33 9 1
upreg
1 1
2 0
3 1
4 0
5 1
6 1
7 1
8 0
9 0
[ reached 'max' / getOption("max.print") -- omitted 1647 rows ]
partitions <- get.venn.partitions(gene_list, keep.elements = T)
partitions$shared <- rowSums(partitions[1:4])
partitions <- partitions[order(-partitions$shared), ]
sharedBy3 <- unique(unlist(subset(partitions, partitions$shared == 3)$..values..))
a <- OntologyProfile(sharedBy3, specie = specie) ensembl_gene_id
1 AT1G02640
2 AT1G11185
3 AT1G11260
4 AT1G11920
5 AT1G12040
6 AT1G12805
7 AT1G17180
8 AT1G18400
9 AT1G19250
10 AT1G27950
11 AT1G29100
12 AT1G33055
13 AT1G43800
14 AT1G48930
15 AT1G54950
16 AT1G54970
17 AT1G55790
18 AT1G56580
description
1 Probable beta-D-xylosidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q94KD8]
2 other RNA [Source:TAIR;Acc:AT1G11185]
3 STP1 [Source:UniProtKB/TrEMBL;Acc:A0A178WJ63]
4 Putative pectate lyase 2 [Source:UniProtKB/Swiss-Prot;Acc:O65388]
5 Leucine-rich repeat extensin-like protein 1 [Source:UniProtKB/Swiss-Prot;Acc:O65375]
6 Nucleotide binding protein [Source:UniProtKB/TrEMBL;Acc:Q3EDD5]
7 Glutathione S-transferase U25 [Source:UniProtKB/Swiss-Prot;Acc:Q9SHH7]
8 Transcription factor BEE 1 [Source:UniProtKB/Swiss-Prot;Acc:Q8GZ13]
9 Probable flavin-containing monooxygenase 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9LMA1]
10 Non-specific lipid transfer protein GPI-anchored 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9C7F7]
11 Heavy metal transport/detoxification superfamily protein [Source:TAIR;Acc:AT1G29100]
12 Uncharacterized protein unannotated coding sequence from BAC F9L11 [Source:UniProtKB/TrEMBL;Acc:Q94K21]
13 Stearoyl-[acyl-carrier-protein] 9-desaturase 6, chloroplastic [Source:UniProtKB/Swiss-Prot;Acc:Q84VY3]
14 Endoglucanase 5 [Source:UniProtKB/Swiss-Prot;Acc:Q9M995]
15 unknown protein; FUNCTIONS IN: molecular_function unknown; INVOLVED IN: biological_process unknown; LOCATED IN: endomembrane system; BEST Arabidopsis thaliana protein match is: unknown protein (TAIR:AT1G07690.1); Ha. [Source:TAIR;Acc:AT1G54950]
16 Proline-rich protein 1 [Source:UniProtKB/Swiss-Prot;Acc:Q9FZ35]
17 Domain of unknown function (DUF2431) [Source:TAIR;Acc:AT1G55790]
18 At1g56580/F25P12_18 [Source:UniProtKB/TrEMBL;Acc:Q9FXB0]
external_gene_name entrezgene_id
1 BXL2 837940
2 NA
3 STP1 837667
4 837742
5 LRX1 837756
6 2745749
7 GSTU25 838289
8 BEE1 838421
9 FMO1 838508
10 LTPG1 839688
11 839785
12 840201
13 S-ACP-DES6 840977
14 AtGH9C1 841315
15 841935
16 PRP1 841938
17 NA
18 SVB 842112
[ reached 'max' / getOption("max.print") -- omitted 82 rows ]
For each comparison, we want to see how many genes are detected as DE.
# getExpression('AT2G35980', conds = c('cNF', 'CNF'))
d <- data.frame(matrix(ncol = 4, nrow = 2))
colnames(d) <- names(g)
row.names(d) <- c("up", "down")
for (comp in names(g)) {
d["up", comp] <- sum(g[[comp]]$upreg == 1)
d["down", comp] <- sum(g[[comp]]$upreg == 0)
}
res <- melt(d)
res$reg = rep(c("up", "down"), 4)
genesco2At <- res
save(genesco2At, file = "genesco2At.RData")
ggplot(res, aes(fill = reg, y = value, x = variable)) + geom_bar(position = "stack",
stat = "identity", alpha = 0.5, color = "black") + ggtitle("Carbon dioxide effet on gene regulation") +
xlab("") + ylab("Number of differentially expressed genes") + coord_flip()On s’interroge sur l’effet de la double carence fer et nitrate en CO2 ambiant et élevé. On ne l’avait pas fait car cela fait varer deux facteurs à la fois.
(Intercept) groupcNF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cnf_2" "cnf_1" "cnf_3" "cNF_3" "cNF_2" "cNF_1"
cnf_2 cnf_1 cnf_3 cNF_3 cNF_2 cNF_1
1.0242524 1.0393358 1.0253221 0.9831632 0.9914864 0.9364401
[1] "9069 genes DE"
(Intercept) groupCNF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "Cnf_3" "Cnf_1" "Cnf_2" "CNF_1" "CNF_3" "CNF_2"
Cnf_3 Cnf_1 Cnf_2 CNF_1 CNF_3 CNF_2
0.9823882 1.0318759 1.0281336 1.0008911 0.9447685 1.0119428
[1] "8309 genes DE"
doubleInt <- list(`ambiant CO2` = genes5$gene_id, `elevanted CO2` = genes6$gene_id)
ggVennDiagram(doubleInt) ensembl_gene_id
1 AT1G01720
2 AT1G01750
3 AT1G02575
4 AT1G02640
5 AT1G02850
6 AT1G03790
7 AT1G03870
8 AT1G04250
9 AT1G05650
10 AT1G05660
11 AT1G06120
12 AT1G07680
13 AT1G07690
14 AT1G07750
15 AT1G08080
16 AT1G08100
17 AT1G08290
18 AT1G08500
description
1 NAC domain-containing protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q39013]
2 actin depolymerizing factor 11 [Source:TAIR;Acc:AT1G01750]
3 unknown protein; BEST Arabidopsis thaliana protein match is: unknown protein (TAIR:AT1G02570.1). [Source:TAIR;Acc:AT1G02575]
4 Probable beta-D-xylosidase 2 [Source:UniProtKB/Swiss-Prot;Acc:Q94KD8]
5 Beta-glucosidase 11 [Source:UniProtKB/Swiss-Prot;Acc:B3H5Q1]
6 Zinc finger CCCH domain-containing protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q9ZWA1]
7 At1g03870 [Source:UniProtKB/TrEMBL;Acc:B3LF88]
8 Auxin-responsive protein IAA17 [Source:UniProtKB/Swiss-Prot;Acc:P93830]
9 F3F20.10 protein [Source:UniProtKB/TrEMBL;Acc:Q9SYK6]
10 F3F20.11 protein [Source:UniProtKB/TrEMBL;Acc:Q9SYK7]
11 Delta-9 desaturase-like 3 protein [Source:UniProtKB/Swiss-Prot;Acc:Q9FPD5]
12 F24B9.23 [Source:UniProtKB/TrEMBL;Acc:Q9LQP3]
13 FUNCTIONS IN: molecular_function unknown; INVOLVED IN: biological_process unknown; LOCATED IN: endomembrane system; EXPRESSED IN: root; CONTAINS InterPro DOMAIN/s: Orthoreovirus membrane fusion p10 (InterPro:IPR009854); BEST Arabidopsis thaliana pro /.../atch is: unknown protein (TAIR:AT1G54950.1); Ha. [Source:TAIR;Acc:AT1G07690]
14 At1g07750/F24B9_13 [Source:UniProtKB/TrEMBL;Acc:Q9LQQ3]
15 Alpha carbonic anhydrase 7 [Source:UniProtKB/Swiss-Prot;Acc:Q8L817]
16 NRT2 [Source:UniProtKB/TrEMBL;Acc:A0A178W7F7]
17 Zinc finger protein WIP3 [Source:UniProtKB/Swiss-Prot;Acc:Q9SGD1]
18 Early nodulin-like protein 18 [Source:UniProtKB/TrEMBL;Acc:O82083]
external_gene_name entrezgene_id
1 NAC002 839265
2 ADF11 839281
3 10723100
4 BXL2 837940
5 BGLU11 839435
6 SOM 839408
7 FLA9 839384
8 IAA17 839568
9 837072
10 837073
11 837121
12 837281
13 837282
14 837290
15 ACA7 837326
16 NRT2.2 837328
17 WIP3 837349
18 ENODL18 837371
[ reached 'max' / getOption("max.print") -- omitted 6632 rows ]
On trouve énormément de gènes, ce qui est cohérent car le nitrate et le fer pris séparément avaient déjà beaucoup d’effet.
On cherche à savoir si il n’y pas pas un soucis avec l’échantillon CNF, qui semble avoi très très peu de différences avec cNF. Aurait-on envoyé les mêmes tubes?
On ne voit que très peu de DEGs en comparant directement cNF et CNF, on cherche donc à savoir si on trouve le même nombre de DEGs quand on compare cNF - x et CNF -x, ce qui validerait l’hypothèse d’un transcriptôme quasi identique. On choisit ici par exemple \(x=\) cnF.
res <- c()
for (x in c("cnF", "cnf", "cNf", "CnF", "Cnf", "CNf")) {
labels <- c("cNF", x)
genes7 <- dualDE(data, labels, pval = 0.01, plot = F)
labels <- c("CNF", x)
genes8 <- dualDE(data, labels, pval = 0.01, plot = F)
double <- list(`ambiant CO2` = genes7$gene_id, `elevanted CO2` = genes8$gene_id)
print(ggVennDiagram(double))
partitions <- get.venn.partitions(double)
res <- c(res, (partitions[2, "..count.."] + partitions[3, "..count.."])/sum(partitions[,
"..count.."]) * 100)
} (Intercept) groupcnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "cnF_2" "cnF_1" "cnF_3"
cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3
0.9859482 0.9865531 0.9545113 1.0150637 1.0299591 1.0279646
[1] "2803 genes DE"
(Intercept) groupcnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "cnF_2" "cnF_1" "cnF_3"
CNF_1 CNF_3 CNF_2 cnF_2 cnF_1 cnF_3
0.9885185 0.9468228 1.0077432 1.0150001 1.0207525 1.0211628
[1] "1928 genes DE"
(Intercept) groupcnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "cnf_2" "cnf_1" "cnf_3"
cNF_3 cNF_2 cNF_1 cnf_2 cnf_1 cnf_3
0.9831632 0.9914864 0.9364401 1.0242524 1.0393358 1.0253221
[1] "9069 genes DE"
(Intercept) groupcnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "cnf_2" "cnf_1" "cnf_3"
CNF_1 CNF_3 CNF_2 cnf_2 cnf_1 cnf_3
0.9883704 0.9318299 1.0000135 1.0208924 1.0390928 1.0198009
[1] "8972 genes DE"
(Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "cNf_1" "cNf_2" "cNf_3"
cNF_3 cNF_2 cNF_1 cNf_1 cNf_2 cNf_3
0.9844303 0.9894266 0.9466501 1.0201203 1.0475235 1.0118491
[1] "7341 genes DE"
(Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "cNf_1" "cNf_2" "cNf_3"
CNF_1 CNF_3 CNF_2 cNf_1 cNf_2 cNf_3
0.9891655 0.9317337 1.0004327 1.0190222 1.0502432 1.0094027
[1] "7140 genes DE"
(Intercept) groupCnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "CnF_2" "CnF_1" "CnF_3"
cNF_3 cNF_2 cNF_1 CnF_2 CnF_1 CnF_3
0.9711423 0.9726028 0.9418377 1.0477020 1.0291049 1.0376104
[1] "2855 genes DE"
(Intercept) groupCnF
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "CnF_2" "CnF_1" "CnF_3"
CNF_1 CNF_3 CNF_2 CnF_2 CnF_1 CnF_3
0.9793780 0.9377198 0.9911882 1.0424709 1.0209230 1.0283201
[1] "1811 genes DE"
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "Cnf_3" "Cnf_1" "Cnf_2"
cNF_3 cNF_2 cNF_1 Cnf_3 Cnf_1 Cnf_2
0.9967780 1.0011218 0.9585024 0.9781775 1.0283345 1.0370859
[1] "8264 genes DE"
(Intercept) groupCnf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "Cnf_3" "Cnf_1" "Cnf_2"
CNF_1 CNF_3 CNF_2 Cnf_3 Cnf_1 Cnf_2
1.0008911 0.9447685 1.0119428 0.9823882 1.0318759 1.0281336
[1] "8309 genes DE"
(Intercept) groupCNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "CNf_1" "CNf_3" "CNf_2"
cNF_3 cNF_2 cNF_1 CNf_1 CNf_3 CNf_2
0.9776759 0.9814592 0.9231468 1.0225871 1.0501686 1.0449624
[1] "7401 genes DE"
(Intercept) groupCNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "CNf_1" "CNf_3" "CNf_2"
CNF_1 CNF_3 CNF_2 CNf_1 CNf_3 CNf_2
0.9778008 0.9263592 0.9875116 1.0165429 1.0509593 1.0408262
[1] "7531 genes DE"
[1] "Pourcentage des gènes DE qui ne sont pas partagés par les deux comparaisons :"
[1] 55.36533 23.40446 27.81213 57.43966 25.25306 27.71432
Il semble qu’il y ait quand même pas mal de différences entre ces transcriptomes, car quand on les compare au même troisième transcriptome on a une différence de 1000 gènes (1900 contre 2800). Le diagramme de Venn montre que ces gènes sont en partie différents. Les transcriptômes ont bien l’air différents. Si on prend un autre x :
(Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "cNF_3" "cNF_2" "cNF_1" "cNf_1" "cNf_2" "cNf_3"
cNF_3 cNF_2 cNF_1 cNf_1 cNf_2 cNf_3
0.9844303 0.9894266 0.9466501 1.0201203 1.0475235 1.0118491
[1] "7341 genes DE"
(Intercept) groupcNf
1 1 0
2 1 0
3 1 0
4 1 1
5 1 1
6 1 1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
[1] "CNF_1" "CNF_3" "CNF_2" "cNf_1" "cNf_2" "cNf_3"
CNF_1 CNF_3 CNF_2 cNf_1 cNf_2 cNf_3
0.9891655 0.9317337 1.0004327 1.0190222 1.0502432 1.0094027
[1] "7140 genes DE"
double <- list(`ambiant CO2` = genes7$gene_id, `elevanted CO2` = genes8$gene_id)
ggVennDiagram(double) ensembl_gene_id
1 AT1G01190
2 AT1G01720
3 AT1G02575
4 AT1G02850
5 AT1G03870
6 AT1G05660
7 AT1G05680
8 AT1G06120
9 AT1G07680
10 AT1G07690
11 AT1G09750
12 AT1G09932
13 AT1G10385
14 AT1G11545
15 AT1G12030
16 AT1G12080
17 AT1G12090
18 AT1G12200
description
1 CYP78A8 [Source:UniProtKB/TrEMBL;Acc:A0A178WIC4]
2 NAC domain-containing protein 2 [Source:UniProtKB/Swiss-Prot;Acc:Q39013]
3 unknown protein; BEST Arabidopsis thaliana protein match is: unknown protein (TAIR:AT1G02570.1). [Source:TAIR;Acc:AT1G02575]
4 Beta-glucosidase 11 [Source:UniProtKB/Swiss-Prot;Acc:B3H5Q1]
5 At1g03870 [Source:UniProtKB/TrEMBL;Acc:B3LF88]
6 F3F20.11 protein [Source:UniProtKB/TrEMBL;Acc:Q9SYK7]
7 Glycosyltransferase (Fragment) [Source:UniProtKB/TrEMBL;Acc:A0A0K1SBE8]
8 Delta-9 desaturase-like 3 protein [Source:UniProtKB/Swiss-Prot;Acc:Q9FPD5]
9 F24B9.23 [Source:UniProtKB/TrEMBL;Acc:Q9LQP3]
10 FUNCTIONS IN: molecular_function unknown; INVOLVED IN: biological_process unknown; LOCATED IN: endomembrane system; EXPRESSED IN: root; CONTAINS InterPro DOMAIN/s: Orthoreovirus membrane fusion p10 (InterPro:IPR009854); BEST Arabidopsis thaliana pro /.../atch is: unknown protein (TAIR:AT1G54950.1); Ha. [Source:TAIR;Acc:AT1G07690]
11 Aspartyl protease AED3 [Source:UniProtKB/Swiss-Prot;Acc:O04496]
12 Phosphoglycerate mutase family protein [Source:UniProtKB/TrEMBL;Acc:Q8GWG7]
13 Exocyst complex component EXO84A [Source:UniProtKB/Swiss-Prot;Acc:F4I4B6]
14 Xyloglucan endotransglucosylase/hydrolase [Source:UniProtKB/TrEMBL;Acc:A0A178W0W2]
15 At1g12030 [Source:UniProtKB/TrEMBL;Acc:O65376]
16 At1g12080 [Source:UniProtKB/TrEMBL;Acc:O65370]
17 ELP [Source:UniProtKB/TrEMBL;Acc:A0A178W1A4]
18 Flavin-containing monooxygenase [Source:UniProtKB/TrEMBL;Acc:A0A178W1K3]
external_gene_name entrezgene_id
1 CYP78A8 839233
2 NAC002 839265
3 10723100
4 BGLU11 839435
5 FLA9 839384
6 837073
7 UGT74E2 837075
8 837121
9 837281
10 837282
11 AED3 837504
12 837526
13 EXO84A 837578
14 XTH8 837698
15 837755
16 837760
17 ELP 837761
18 837773
[ reached 'max' / getOption("max.print") -- omitted 6047 rows ]
On va faire les corrélations d’expression entre ces deux conditions.
scatter <- function(df, x, y) {
sp <- ggplot(df, aes(log(df[, x]), log(df[, y]))) + geom_bin2d(bins = 70) + scale_fill_gradient2() +
theme(legend.position = "none") + labs(x = x, y = y)
return(sp)
}
comps <- labels <- c("cNF", "CNF")
diffs <- c()
# correlations between ambient and elevated
for (ambient in colnames(data)[grepl("cNF", colnames(data))]) {
for (elevated in colnames(data)[grepl("CNF", colnames(data))]) {
diffs = c(diffs, cor(data[ambient], data[elevated]))
}
}
diffs_other <- c()
for (ambient in colnames(data)[grepl("cNF", colnames(data))]) {
for (elevated in colnames(data)[!grepl("NF", colnames(data))]) {
diffs_other = c(diffs_other, cor(data[ambient], data[elevated]))
}
}
# correlations within ambien and elevated
sames <- c()
samples <- colnames(data)[grepl("cNF", colnames(data))]
sames <- c(sames, cor(data[, samples[1]], data[, samples[2]]), cor(data[, samples[1]],
data[, samples[3]]), cor(data[, samples[2]], data[, samples[3]]))
samples <- colnames(data)[grepl("CNF", colnames(data))]
sames <- c(sames, cor(data[, samples[1]], data[, samples[2]]), cor(data[, samples[1]],
data[, samples[3]]), cor(data[, samples[2]], data[, samples[3]]))
sames[1] 0.9938199 0.9906937 0.9810858 0.9860430 0.9921428 0.9860068
df <- data.frame(`Same condition` = c(sames, rep(NA, 3)), `Ambient vs Elevated` = diffs)
res <- na.omit(melt(df))
# comparisons
ggplot(data = res, aes(x = variable, y = value, fill = variable)) + geom_boxplot(binaxis = "y",
alpha = 0.4, stackdir = "center") + theme(axis.text.x = element_text(angle = 320,
hjust = 0, colour = "grey50"), plot.title = element_text(size = 14, face = "bold")) +
ggtitle("Correlation for expression accross conditions") + geom_dotplot(binaxis = "y",
stackdir = "center") + stat_compare_means(method = "wilcox.test", hide.ns = FALSE,
label = "p.signif", symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05,
1), symbols = c("****", "***", "**", "*", "ns")))res <- rbind.data.frame(res, data.frame(variable = rep("cNF vs all others except CNF",
length(diffs_other)), value = diffs_other))
ggplot(data = res, aes(x = variable, y = value, fill = variable)) + geom_boxplot(binaxis = "y",
alpha = 0.4, stackdir = "center") + theme(axis.text.x = element_text(angle = 320,
hjust = 0, colour = "grey50"), plot.title = element_text(size = 14, face = "bold")) +
ggtitle("Correlation for expression accross conditions") + geom_dotplot(binaxis = "y",
stackdir = "center") + stat_compare_means(method = "wilcox.test", hide.ns = FALSE,
label = "p.signif", symnum.args = list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05,
1), symbols = c("****", "***", "**", "*", "ns"))) cNF et CNF semblent bien plus proches entre eux que cNF ne l’est avec tous les autres.